########################################################################################
### Replication file for Congressional Representation by Petition:                   ###
### Assessing the Voices of the Voteless in a Comprehensive New Database, 1789-1949  ###
###                                                                                  ###
### Authors: Maggie Blackhawk, Daniel Carpenter, Tobias Resch and Benjamin Schneer   ###
###                                                                                  ###
### File: contours_replication2.R                                                    ###
########################################################################################



#Set Working Directoiry
rm(list=ls())

#Set directory to location where replication files are stored
setwd("~/replication")

#load packages and functions
#install.packages(c("data.table","xtable"))

require(data.table)
require(xtable)
source("additional_functions.R")

load("data/tdata_prva_final.RData")


#check model accuracy data

broad.classifier.performance<-check.model.accuracy(tdata.prva.final$topic.broad,tdata.prva.final$actual.topic.broad)

broad.classifier.performance<-merge(broad.classifier.performance,tdata.prva.final[,.N,by=.("pred.class"=actual.topic.broad)],by="pred.class",all.x=T)

broad.classifier.performance[,2:4]<-sapply(broad.classifier.performance[,2:4], function(x) as.numeric(as.character(x)))

broad.classifier.performance$N[is.na(broad.classifier.performance$N)]<-0


#Find Precision, recall, etc.
lapply(broad.classifier.performance[,2:4], function(x) weighted.mean(x,w=broad.classifier.performance$N,na.rm=T))

#Find pct correct match
tdata.prva.final[,sum(topic.broad==actual.topic.broad,na.rm=T)/.N]



################
### Table A2 ###
################

# Cretae table showing Mismatches

tdata.prva.final[,topic.broad:=factor(topic.broad)]
tdata.prva.final[,actual.topic.broad:=factor(actual.topic.broad,levels=sort(unique(topic.broad)))]

out <- tdata.prva.final[,table(topic.broad,actual.topic.broad)]

write.csv(out,file="figs/taba2.csv")


################
### Table A3 ###
################

out2<-tdata.prva.final[,.(.N,pct=.N/nrow(tdata.prva.final)),by=.(topic.broad,actual.topic.broad)]
out2[,topic.N:=sum(N),by=topic.broad]

out2.mismatches <- out2[topic.broad!=actual.topic.broad][order(N,decreasing=T)]

out2.matches <- out2[topic.broad==actual.topic.broad]; out2.matches[,N.correct:=N/topic.N]
out2.matches <- out2.matches[order(N.correct,decreasing=T)]

write.csv(out2.matches[,.(Topic=topic.broad,N=topic.N,"Share Correctly Classified"=round(N.correct,2))],file="figs/taba3.csv",row.names=F)

